Setup chunk
Load libraries
knitr::opts_chunk$set(fig.width = 8)
knitr::opts_knit$set(root.dir = normalizePath(".."))
knitr::opts_knit$get("root.dir")
[1] "/nas/groups/treutlein/USERS/tomasgomes/projects/pallium_evo"
Set colours for cell types and regions
meta = read.csv("data/annotations/axolotl_all_umeta.csv",
header = T, row.names = 1)
cols_cc = c(
#epen
"#12400c", "#2d6624","#1d4f15", "#174711", "#2d6624", "#3d7f33", "#3b7b30", "#468b3b", "#4f9843","#5dae50", "#66bb58", "#72cd64", "#306a26", "#78d669", "#81e472",
#gaba
"#700209", "#75090e","#7a0f13", "#801517", "#851a1b", "#8a1f1f", "#902423", "#952927", "#9a2d2c","#a03230", "#a53634", "#aa3a39", "#b03f3d","#b54342", "#ba4846", "#c04c4b", "#c5504f", "#ca5554", "#d05959", "#d55e5e","#73050c", "#780c11","#8d2221", "#982b2a","#a23432", "#a83837", "#b2413f", "#b84544", "#bd4a49", "#c85352", #"#cd5756",
#glut
"#054674", "#134d7b","#1d5481", "#265a88", "#2e618e", "#73a4cb", "#366995", "#3e709c", "#4677a2","#4d7ea9", "#5586b0", "#5c8db7", "#6495bd","#6b9cc4", "#7bacd2", "#8ebfe4", "#96c7eb", "#9ecff2", "#18507e", "#18507e","#2a5e8b", "#497ba6","#5889b3", "#6fa0c8","#7fafd6", "#6091ba", "#5182ac", "#3a6c98", "#a6d7f9",
#npc
"#ffb120", "#feb72a","#fdbc34", "#fcc13d", "#fbc745", "#facc4e", "#f9d156", "#f8d65f", "#f8da68","#f7df70", "#f7e479", "#f7e882", "#f7ed8a", "#f7f193", "#eca319"
)
ccnames = unique(sort(meta$cellclusters))
names(cols_cc) = c(ccnames[grepl("epen", ccnames)], ccnames[grepl("GABA", ccnames)],ccnames[grepl("glut", ccnames)],ccnames[grepl("npc", ccnames)])
reg_cols = c("other/unknown_pred" = "#C7CCC7",
"medial" = "#52168D", "medial_pred" = "#661CB0",
"dorsal" = "#C56007", "dorsal_pred" = "#ED7307",
"lateral" = "#118392", "lateral_pred" = "#16A3B6")
reg_cols_simp = c("medial" = "#52168D", "dorsal" = "#C56007", "lateral" = "#118392")
colors <- colorRampPalette(brewer.pal(11,'Spectral')[-6])(100)
Load data
meta = read.csv("data/annotations/axolotl_all_umeta.csv",
header = T, row.names = 1)
cols_cc = c(
#epen
"#12400c", "#2d6624","#1d4f15", "#174711", "#2d6624", "#3d7f33", "#3b7b30", "#468b3b", "#4f9843","#5dae50", "#66bb58", "#72cd64", "#306a26", "#78d669", "#81e472",
#gaba
"#700209", "#75090e","#7a0f13", "#801517", "#851a1b", "#8a1f1f", "#902423", "#952927", "#9a2d2c","#a03230", "#a53634", "#aa3a39", "#b03f3d","#b54342", "#ba4846", "#c04c4b", "#c5504f", "#ca5554", "#d05959", "#d55e5e","#73050c", "#780c11","#8d2221", "#982b2a","#a23432", "#a83837", "#b2413f", "#b84544", "#bd4a49", "#c85352", #"#cd5756",
#glut
"#054674", "#134d7b","#1d5481", "#265a88", "#2e618e", "#73a4cb", "#366995", "#3e709c", "#4677a2","#4d7ea9", "#5586b0", "#5c8db7", "#6495bd","#6b9cc4", "#7bacd2", "#8ebfe4", "#96c7eb", "#9ecff2", "#18507e", "#18507e","#2a5e8b", "#497ba6","#5889b3", "#6fa0c8","#7fafd6", "#6091ba", "#5182ac", "#3a6c98", "#a6d7f9",
#npc
"#ffb120", "#feb72a","#fdbc34", "#fcc13d", "#fbc745", "#facc4e", "#f9d156", "#f8d65f", "#f8da68","#f7df70", "#f7e479", "#f7e882", "#f7ed8a", "#f7f193", "#eca319"
)
ccnames = unique(sort(meta$cellclusters))
names(cols_cc) = c(ccnames[grepl("epen", ccnames)], ccnames[grepl("GABA", ccnames)],ccnames[grepl("glut", ccnames)],ccnames[grepl("npc", ccnames)])
reg_cols = c("other/unknown_pred" = "#C7CCC7",
"medial" = "#52168D", "medial_pred" = "#661CB0",
"dorsal" = "#C56007", "dorsal_pred" = "#ED7307",
"lateral" = "#118392", "lateral_pred" = "#16A3B6")
reg_cols_simp = c("medial" = "#52168D", "dorsal" = "#C56007", "lateral" = "#118392")
colors <- colorRampPalette(brewer.pal(11,'Spectral')[-6])(100)
Format metadata
ax_srat = readRDS("data/expression/axolotl_reclust/all_nuclei_clustered_highlevel_anno.RDS")
meta = read.csv("data/annotations/axolotl_all_umeta.csv",
header = T, row.names = 1)
ax_srat = AddMetaData(ax_srat, metadata = meta)
div_srat = readRDS("data/expression/axolotl_reclust/Edu_1_2_4_6_8_12_fil_highvarfeat.RDS")
Define groups
ax_meta = ax_srat@meta.data[,c("classes", "cellclusters", "regions", "sample", "chem")]
ax_meta$sample = ifelse(endsWith(rownames(ax_meta), "-1_1"), "a1_1",
ifelse(endsWith(rownames(ax_meta), "-1_2"), "a1_2",
ifelse(endsWith(rownames(ax_meta), "-1_3"), "a3_1",
ifelse(endsWith(rownames(ax_meta), "-1_4"), "a3_2", ax_meta$sample))))
meta_regs = read.csv("data/processed/multiome/WP_region_predictions.csv", header = T, row.names = 1)
newcellnames = rownames(meta_regs)
newcellnames = gsub("-a1-1", "-1_1", newcellnames)
newcellnames = gsub("-a1-2", "-1_2", newcellnames)
newcellnames = gsub("-a3-1", "-1_3", newcellnames)
newcellnames = gsub("-a3-2", "-1_4", newcellnames)
rownames(meta_regs) = newcellnames
meta_regs$all_pred_regs_top = paste0(meta_regs$pred_regions_top, "_pred")
ax_meta = merge(ax_meta, meta_regs[,c(2,4)], by = 0, all = T)
ax_meta$pred_regions_top[is.na(ax_meta$pred_regions_top)] = ax_meta$regions[is.na(ax_meta$pred_regions_top)]
ax_meta$all_pred_regs_top[is.na(ax_meta$all_pred_regs_top)] = ax_meta$regions[is.na(ax_meta$all_pred_regs_top)]
rownames(ax_meta) = ax_meta[,1]
ax_meta = ax_meta[,-1]
ax_meta = cbind(ax_meta[rownames(ax_srat@reductions$umap_harmony@cell.embeddings),],
ax_srat@reductions$umap_harmony@cell.embeddings)
ax_meta = cbind(unlist(lapply(strsplit(rownames(ax_meta), "-"), function(x) x[1])), ax_meta)
colnames(ax_meta)[1] = "cells"
div_meta = div_srat@meta.data[,c("high_level_anno", "high_level_clustering", "sample", "batch")]
div_meta = cbind(div_meta, div_srat@reductions$umap@cell.embeddings)
div_meta = cbind(unlist(lapply(strsplit(rownames(div_meta), "-"), function(x) x[1])), div_meta)
colnames(div_meta)[1] = "cells"
Subset data
common_cells = c("epen_clus_3", "epen_clus_4")
r_cells = list("hippocampus" = c("glut_SUBSET_0", "glut_SUBSET_4", "glut_SUBSET_5",
"glut_SUBSET_7", "npc_SUBSET_7","npc_SUBSET_11",
"glut_SUBSET_14", "glut_SUBSET_15", "glut_SUBSET_12",
"glut_SUBSET_16", "glut_SUBSET_17", "glut_SUBSET_3",
"npc_SUBSET_1", "npc_SUBSET_3", "npc_SUBSET_9"),
"lc" = c("glut_SUBSET_9","npc_SUBSET_7","npc_SUBSET_4", "glut_SUBSET_21", "glut_SUBSET_2",
"glut_SUBSET_25","npc_SUBSET_14","npc_SUBSET_0"),
"cl111" = c("glut_SUBSET_1", "glut_SUBSET_11", "npc_SUBSET_2","npc_SUBSET_4",
"npc_SUBSET_7", "npc_SUBSET_13"),
"eomes" = c("npc_SUBSET_7","npc_SUBSET_4", "glut_SUBSET_10","glut_SUBSET_22"),
"cl8620_ep" = c("glut_SUBSET_8","glut_SUBSET_6","glut_SUBSET_20", "epen_clus_1",
"epen_clus_7", "epen_clus_14"),
"indirect" = c("npc_SUBSET_7","npc_SUBSET_4", "glut_SUBSET_10","glut_SUBSET_22",
"glut_SUBSET_1", "glut_SUBSET_11", "npc_SUBSET_2","npc_SUBSET_4",
"npc_SUBSET_7", "npc_SUBSET_13", "glut_SUBSET_9","npc_SUBSET_7",
"npc_SUBSET_4", "glut_SUBSET_21", "glut_SUBSET_2","glut_SUBSET_25",
"npc_SUBSET_14","npc_SUBSET_0","glut_SUBSET_0", "glut_SUBSET_4",
"glut_SUBSET_5","glut_SUBSET_7", "npc_SUBSET_7","npc_SUBSET_11",
"glut_SUBSET_14", "glut_SUBSET_15", "glut_SUBSET_12", "glut_SUBSET_16",
"glut_SUBSET_17", "glut_SUBSET_3", "npc_SUBSET_1", "npc_SUBSET_3",
"npc_SUBSET_9"))
Get clusters
sub_srat = list()
for(n in names(r_cells)){
sub_srat[[n]] = ax_srat[,ax_srat$cellclusters %in% c(common_cells, r_cells[[n]])]
}
sub_srat[["all"]] = ax_srat[,ax_srat$cellclusters %in% c(common_cells, unlist(r_cells))]
Load UMAP coord
umap_l = list()
for(n in names(sub_srat)[1:5]){
n2 = gsub("cl", "", n)
umap_l[[n]] = read.csv(paste0("results/RNAvelocity/glut_corr/glutNoEp_ss_", n2, "_umap.csv"),
header = T, row.names = 1)
}
umap_l = list()
for(n in names(sub_srat)[1:5]){
n2 = gsub("cl", "", n)
umap_l[[n]] = read.csv(paste0("results/RNAvelocity/glut_corr/glutNoEp_ss_", n2, "_umap.csv"),
header = T, row.names = 1)
}
neolabs = paste0(sub_srat[[n]]@meta.data$cellclusters, "_",
sub_srat[[n]]@meta.data$RNA_snn_res.0.6)
lin = getLineages(Reductions(sub_srat[[n]], "umap")@cell.embeddings,
neolabs, start.clus= "epen_clus_3_9",
end.clus = c("glut_SUBSET_7_4", "glut_SUBSET_0_0"))
Error in if (distance > best.stats$distance) { :
missing value where TRUE/FALSE needed
n = "lc"
DimPlot(sub_srat[[n]], group.by = "cellclusters", label = T, reduction = "umap")
DimPlot(sub_srat[[n]], group.by = "RNA_snn_res.1.4", label = T, reduction = "umap")
# run slingshot
lin = getLineages(umap_l[[n]],
as.character(sub_srat[[n]]@meta.data$RNA_snn_res.1.4),
start.clus= "2", end.clus = c("3", "15"))
crv = getCurves(lin)
# plotting
pt = apply(crv@assays@data$pseudotime, 1, function(x) min(x[!is.na(x)]))
plotcol <- colors[cut(pt, breaks=100)]
plot(umap_l[[n]],
col = plotcol, asp = 1, pch = 16)
lines(SlingshotDataSet(lin), lwd = 3, col = 'black', show.constraints = TRUE)
sce = slingshot(Reductions(sub_srat[[n]], "umap")@cell.embeddings,
paste0("cl",as.character(sub_srat[[n]]@meta.data$RNA_snn_res.1)),
start.clus= "cl10", end.clus = c("cl0", "cl3"))
UMAPPlot(sub_srat[[n]], group.by = "RNA_snn_res.0.6", label = T)
library(grDevices)
library(RColorBrewer)
colors <- colorRampPalette(brewer.pal(11,'Spectral')[-6])(100)
pt = apply(sce@assays@data$pseudotime, 1, function(x) min(x[!is.na(x)]))
plotcol <- colors[cut(pt, breaks=100)]
plot(Reductions(sub_srat[[n]], "umap")@cell.embeddings, col = plotcol, pch=16, asp = 1)
lines(SlingshotDataSet(sce), lwd=2, col='black')
DimPlot(sub_srat[[n]], group.by = "cellclusters", reduction = "umap")
Prepare URD objects
Calculate DRs
for(n in names(sub_urd)){
sub_urd[[n]]@var.genes = findVariableGenes(sub_urd[[n]], set.object.var.genes=F,do.plot=F,
diffCV.cutoff=0.3, mean.min=.005, mean.max=100)
sub_urd[[n]] = calcPCA(sub_urd[[n]], mp.factor = 2)
pcSDPlot(sub_urd[[n]])
sub_urd[[n]] = calcTsne(object = sub_urd[[n]])
plotDim(sub_urd[[n]], "cellclusters")
sub_urd[[n]] = calcDM(sub_urd[[n]], knn = 150, sigma=16)
plotDimArray(sub_urd[[n]], reduction.use = "dm", dims.to.plot = 1:8,
outer.title = "Diffusion Map (Sigma 16, 150 NNs): Stage", label="cellclusters",
plot.title="", legend=F)
plotDim(sub_urd[[n]], "cellclusters", transitions.plot = 10000)
}
Pseudotime calculation
floods_l = list()
for(n in names(sub_urd)){
print(n)
# Here we use all cells from the first stage as the root
root.cells = cellsInCluster(sub_urd[[n]], clustering = "cellclusters", "epen_clus_3")
# Then we run 'flood' simulations
floods = floodPseudotime(sub_urd[[n]], root.cells = root.cells, n=250,
minimum.cells.flooded = 2, verbose=F)
# fix excessive NA
floods = floods[,colSums(!is.na(floods))>1000]
# The we process the simulations into a pseudotime
sub_urd[[n]] = floodPseudotimeProcess(sub_urd[[n]], floods, floods.name="pseudotime")
pseudotimePlotStabilityOverall(sub_urd[[n]])
plotDists(sub_urd[[n]], "pseudotime", "cellclusters", plot.title="Pseudotime by stage")
floods_l[[n]] = floods
}
Save
end_cc = list("hippocampus" = c("glut_SUBSET_0", "glut_SUBSET_7"),
"lc" = c("glut_SUBSET_9","glut_SUBSET_2"),
"cl111" = c("glut_SUBSET_1", "glut_SUBSET_11"),
"eomes" = c("glut_SUBSET_10","glut_SUBSET_22"),
"cl8620_ep" = c("glut_SUBSET_8","glut_SUBSET_6"))
subset_dat_l = list()
floods_4_l = list()
for(n in names(sub_urd)){
print(n)
# Here we use all cells from the first stage as the root
root.cells = cellsInCluster(sub_urd[[n]], clustering = "cellclusters", "epen_clus_4")
# Then we run 'flood' simulations
floods = floodPseudotime(sub_urd[[n]], root.cells = root.cells, n=250,
minimum.cells.flooded = 2, verbose=F)
# fix excessive NA
floods = floods[,colSums(!is.na(floods))>1000]
# The we process the simulations into a pseudotime
sub_urd[[n]] = floodPseudotimeProcess(sub_urd[[n]], floods, floods.name="pseudotime_4")
pseudotimePlotStabilityOverall(sub_urd[[n]])
plotDists(sub_urd[[n]], "pseudotime_4", "cellclusters", plot.title="Pseudotime by stage")
# Create a subsetted object of just those cells from the final stage
subsetdat = urdSubset(sub_urd[[n]],
cells.keep=cellsInCluster(sub_urd[[n]], "cellclusters", end_cc[[n]]))
subset_dat_l[[n]] = subsetdat
floods_4_l[[n]] = floods
}
[1] "hippocampus"
[1] "lc"
[1] "cl111"
[1] "eomes"
[1] "cl8620_ep"
Determine new identities for end tips
save(subset_dat_l, floods_l, sub_urd, floods_4_l, file = "data/processed/URD/URD_lists.RData")
axials_l = list()
for(n in names(sub_urd)){
# Use the variable genes that were calculated only on the final group of stages (which
# contain the last stage).
subset_dat_l[[n]]@var.genes = sub_urd[[n]]@var.genes[sub_urd[[n]]@var.genes %in% rownames(subset_dat_l[[n]]@logupx.data)]
# Calculate PCA and tSNE
subset_dat_l[[n]] = calcPCA(subset_dat_l[[n]], mp.factor = 1.5)
pcSDPlot(subset_dat_l[[n]])
set.seed(20)
subsetdat = calcTsne(subset_dat_l[[n]])
# Calculate graph clustering of these cells
subset_dat_l[[n]] = graphClustering(subset_dat_l[[n]],
num.nn = 50, do.jaccard=T, method="Louvain")
plotDim(subset_dat_l[[n]], "Louvain-50",
plot.title = "Louvain (50 NN) graph clustering", point.size=3)
plotDim(subset_dat_l[[n]], "cellclusters", plot.title = "cellclusters", point.size=3)
# Copy cluster identities from axial.6somite object to a new clustering ("tip.clusters") in the full axial object.
sub_urd[[n]]@group.ids[rownames(subset_dat_l[[n]]@group.ids), "tip.clusters"] = subset_dat_l[[n]]@group.ids$`Louvain-50`
# Determine the parameters of the logistic used to bias the transition probabilities.
# The procedure is relatively robust to this parameter, but the cell numbers may need to be
# modified for larger or smaller data sets.
axial.ptlogistic = pseudotimeDetermineLogistic(sub_urd[[n]], "pseudotime",
optimal.cells.forward=20, max.cells.back=20,
do.plot = T)
axial.biased.tm = as.matrix(pseudotimeWeightTransitionMatrix(sub_urd[[n]], "pseudotime",
logistic.params=axial.ptlogistic))
axials_l[[n]] = list("log" = axial.ptlogistic, "tm" = axial.biased.tm)
}
[1] "2022-05-24 19:04:04: Centering and scaling data."
[1] "2022-05-24 19:04:07: Removing genes with no variation."
[1] "2022-05-24 19:04:08: Calculating PCA."
[1] "2022-05-24 19:09:24: Estimating significant PCs."
[1] "Marchenko-Pastur eigenvalue null upper bound: 3.85101341791152"
[1] "11 PCs have eigenvalues larger than 1.5 times null upper bound."
[1] "Storing 22 PCs."
Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
[1] "Mean pseudotime back (~20 cells) 0.00114130123513062"
[1] "Chance of accepted move to equal pseudotime is 0.5"
[1] "Mean pseudotime forward (~20 cells) -0.00114130123513062"
[1] "2022-05-24 19:10:19: Centering and scaling data."
[1] "2022-05-24 19:10:24: Removing genes with no variation."
[1] "2022-05-24 19:10:25: Calculating PCA."
[1] "2022-05-24 19:15:31: Estimating significant PCs."
[1] "Marchenko-Pastur eigenvalue null upper bound: 4.99020501012741"
[1] "16 PCs have eigenvalues larger than 1.5 times null upper bound."
[1] "Storing 32 PCs."
Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
[1] "Mean pseudotime back (~20 cells) 0.00246109710635848"
[1] "Chance of accepted move to equal pseudotime is 0.5"
[1] "Mean pseudotime forward (~20 cells) -0.00246109710635848"
[1] "2022-05-24 19:15:53: Centering and scaling data."
[1] "2022-05-24 19:15:57: Removing genes with no variation."
[1] "2022-05-24 19:15:58: Calculating PCA."
[1] "2022-05-24 19:20:34: Estimating significant PCs."
[1] "Marchenko-Pastur eigenvalue null upper bound: 4.8946219896864"
[1] "9 PCs have eigenvalues larger than 1.5 times null upper bound."
[1] "Storing 18 PCs."
Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
[1] "Mean pseudotime back (~20 cells) 0.00277521046018166"
[1] "Chance of accepted move to equal pseudotime is 0.5"
[1] "Mean pseudotime forward (~20 cells) -0.00277521046018166"
[1] "2022-05-24 19:20:55: Centering and scaling data."
[1] "2022-05-24 19:20:57: Removing genes with no variation."
[1] "2022-05-24 19:20:58: Calculating PCA."
[1] "2022-05-24 19:21:51: Estimating significant PCs."
[1] "Marchenko-Pastur eigenvalue null upper bound: 8.02811103081175"
[1] "10 PCs have eigenvalues larger than 1.5 times null upper bound."
[1] "Storing 20 PCs."
Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
[1] "Mean pseudotime back (~20 cells) 0.0052763887436612"
[1] "Chance of accepted move to equal pseudotime is 0.5"
[1] "Mean pseudotime forward (~20 cells) -0.0052763887436612"
[1] "2022-05-24 19:21:59: Centering and scaling data."
[1] "2022-05-24 19:22:03: Removing genes with no variation."
[1] "2022-05-24 19:22:03: Calculating PCA."
[1] "2022-05-24 19:25:52: Estimating significant PCs."
[1] "Marchenko-Pastur eigenvalue null upper bound: 4.55584963964571"
[1] "12 PCs have eigenvalues larger than 1.5 times null upper bound."
[1] "Storing 24 PCs."
Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
[1] "Mean pseudotime back (~20 cells) 0.00446986465888735"
[1] "Chance of accepted move to equal pseudotime is 0.5"
[1] "Mean pseudotime forward (~20 cells) -0.00446986465888735"
Random walk simulations
axials_4_l = list()
for(n in names(sub_urd)){
# Use the variable genes that were calculated only on the final group of stages (which
# contain the last stage).
subset_dat_l[[n]]@var.genes = sub_urd[[n]]@var.genes[sub_urd[[n]]@var.genes %in% rownames(subset_dat_l[[n]]@logupx.data)]
# Calculate PCA and tSNE
subset_dat_l[[n]] = calcPCA(subset_dat_l[[n]], mp.factor = 1.5)
pcSDPlot(subset_dat_l[[n]])
set.seed(20)
subsetdat = calcTsne(subset_dat_l[[n]])
# Calculate graph clustering of these cells
subset_dat_l[[n]] = graphClustering(subset_dat_l[[n]],
num.nn = 50, do.jaccard=T, method="Louvain")
plotDim(subset_dat_l[[n]], "Louvain-50",
plot.title = "Louvain (50 NN) graph clustering", point.size=3)
plotDim(subset_dat_l[[n]], "cellclusters", plot.title = "cellclusters", point.size=3)
# Copy cluster identities from axial.6somite object to a new clustering ("tip.clusters") in the full axial object.
sub_urd[[n]]@group.ids[rownames(subset_dat_l[[n]]@group.ids), "tip.clusters"] = subset_dat_l[[n]]@group.ids$`Louvain-50`
# Determine the parameters of the logistic used to bias the transition probabilities.
# The procedure is relatively robust to this parameter, but the cell numbers may need to be
# modified for larger or smaller data sets.
axial.ptlogistic = pseudotimeDetermineLogistic(sub_urd[[n]], "pseudotime_4",
optimal.cells.forward=20, max.cells.back=20,
do.plot = T)
axial.biased.tm = as.matrix(pseudotimeWeightTransitionMatrix(sub_urd[[n]], "pseudotime_4",
logistic.params=axial.ptlogistic))
axials_4_l[[n]] = list("log" = axial.ptlogistic, "tm" = axial.biased.tm)
}
[1] "2022-05-25 00:21:49: Centering and scaling data."
[1] "2022-05-25 00:21:52: Removing genes with no variation."
[1] "2022-05-25 00:21:53: Calculating PCA."
[1] "2022-05-25 00:26:13: Estimating significant PCs."
[1] "Marchenko-Pastur eigenvalue null upper bound: 3.85101341791152"
[1] "11 PCs have eigenvalues larger than 1.5 times null upper bound."
[1] "Storing 22 PCs."
Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
[1] "Mean pseudotime back (~20 cells) 0.00113280913779219"
[1] "Chance of accepted move to equal pseudotime is 0.5"
[1] "Mean pseudotime forward (~20 cells) -0.00113280913779219"
[1] "2022-05-25 00:27:07: Centering and scaling data."
[1] "2022-05-25 00:27:11: Removing genes with no variation."
[1] "2022-05-25 00:27:12: Calculating PCA."
[1] "2022-05-25 00:33:12: Estimating significant PCs."
[1] "Marchenko-Pastur eigenvalue null upper bound: 4.99020501012741"
[1] "16 PCs have eigenvalues larger than 1.5 times null upper bound."
[1] "Storing 32 PCs."
Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
[1] "Mean pseudotime back (~20 cells) 0.00265125322040561"
[1] "Chance of accepted move to equal pseudotime is 0.5"
[1] "Mean pseudotime forward (~20 cells) -0.00265125322040561"
[1] "2022-05-25 00:33:34: Centering and scaling data."
[1] "2022-05-25 00:33:38: Removing genes with no variation."
[1] "2022-05-25 00:33:38: Calculating PCA."
[1] "2022-05-25 00:38:07: Estimating significant PCs."
[1] "Marchenko-Pastur eigenvalue null upper bound: 4.8946219896864"
[1] "9 PCs have eigenvalues larger than 1.5 times null upper bound."
[1] "Storing 18 PCs."
Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
[1] "Mean pseudotime back (~20 cells) 0.00298964955430957"
[1] "Chance of accepted move to equal pseudotime is 0.5"
[1] "Mean pseudotime forward (~20 cells) -0.00298964955430957"
[1] "2022-05-25 00:38:26: Centering and scaling data."
[1] "2022-05-25 00:38:28: Removing genes with no variation."
[1] "2022-05-25 00:38:28: Calculating PCA."
[1] "2022-05-25 00:39:17: Estimating significant PCs."
[1] "Marchenko-Pastur eigenvalue null upper bound: 8.02811103081175"
[1] "10 PCs have eigenvalues larger than 1.5 times null upper bound."
[1] "Storing 20 PCs."
Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
[1] "Mean pseudotime back (~20 cells) 0.00562420847777713"
[1] "Chance of accepted move to equal pseudotime is 0.5"
[1] "Mean pseudotime forward (~20 cells) -0.00562420847777713"
[1] "2022-05-25 00:39:26: Centering and scaling data."
[1] "2022-05-25 00:39:28: Removing genes with no variation."
[1] "2022-05-25 00:39:29: Calculating PCA."
[1] "2022-05-25 00:43:04: Estimating significant PCs."
[1] "Marchenko-Pastur eigenvalue null upper bound: 4.55584963964571"
[1] "12 PCs have eigenvalues larger than 1.5 times null upper bound."
[1] "Storing 24 PCs."
Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
[1] "Mean pseudotime back (~20 cells) 0.00469503083919989"
[1] "Chance of accepted move to equal pseudotime is 0.5"
[1] "Mean pseudotime forward (~20 cells) -0.00469503083919989"
subfix_l = list()
for(n in names(sub_urd)){
subsetfix = urdSubset(sub_urd[[n]],
cells.keep=rownames(sub_urd[[n]]@meta)[rownames(sub_urd[[n]]@meta)%in%rownames(axials_l[[n]][["tm"]])])
root.cells = cellsInCluster(sub_urd[[n]], clustering = "cellclusters", "epen_clus_3")
# Simulate the biased random walks from each tip
axial.walks = simulateRandomWalksFromTips(subsetfix, tip.group.id="tip.clusters",
root.cells=root.cells, n.per.tip = 25000,
transition.matrix = axials_l[[n]][["tm"]],
root.visits = 1, max.steps = 10000, verbose = F)
# Process the biased random walks into visitation frequencies
subsetfix = processRandomWalksFromTips(subsetfix, axial.walks, verbose = F)
axials_l[[n]][["walks"]] = axial.walks
subfix_l[[n]] = subsetfix
}
Save
subfix_4_l = list()
for(n in names(sub_urd)){
subsetfix = urdSubset(sub_urd[[n]],
cells.keep=rownames(sub_urd[[n]]@meta)[rownames(sub_urd[[n]]@meta)%in%rownames(axials_4_l[[n]][["tm"]])])
root.cells = cellsInCluster(sub_urd[[n]], clustering = "cellclusters", "epen_clus_4")
# Simulate the biased random walks from each tip
axial.walks = simulateRandomWalksFromTips(subsetfix, tip.group.id="tip.clusters",
root.cells=root.cells, n.per.tip = 25000,
transition.matrix = axials_4_l[[n]][["tm"]],
root.visits = 1, max.steps = 10000, verbose = F)
# Process the biased random walks into visitation frequencies
subsetfix = processRandomWalksFromTips(subsetfix, axial.walks, verbose = F)
axials_4_l[[n]][["walks"]] = axial.walks
subfix_4_l[[n]] = subsetfix
}
Check tip clusters
save(subset_dat_l, floods_l, floods_4_l, sub_urd, axials_l, subfix_l, axials_4_l, subfix_4_l,
file = "data/processed/URD/URD_lists.RData")
Plot trees
max_vals = list()
for(n in names(subfix_4_l)){
plt1 = plotDim(subfix_4_l[[n]], "cellclusters", plot.title=n)
plt2 = plotDim(subfix_4_l[[n]], "tip.clusters", plot.title=n)
print(plt1+plt2)
max_vals[[n]] = sort(tapply(subfix_4_l[[n]]@pseudotime$pseudotime_4,
subfix_4_l[[n]]@group.ids$tip.clusters, max))
}
Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
Warning: `guides(<scale> = FALSE)` is deprecated. Please use `guides(<scale> = "none")` instead.
Save trees
for(n in names(tree_plt_l)){
pdf(paste0("results/pseudotime/SS_", n, "_tree.pdf"), height = 3.5, width = 4)
print(tree_plt_l[[n]])
dev.off()
}
Load RNA velocity pseudotime
for(n in names(tree_plt_l)){
pdf(paste0("results/pseudotime/SS_", n, "_tree.pdf"), height = 3.5, width = 4)
print(tree_plt_l[[n]])
dev.off()
}
Plot pseudotime comparisons
glut_dat_df = read.csv("results/RNAvelocity/glut_corr/heatmap_gen/glut_dat_df.csv",
header = T, row.names = 1)
newcellnames = rownames(glut_dat_df)
newcellnames = gsub("_D1", "-1_1_5", newcellnames)
newcellnames = gsub("_D2", "-1_2_5", newcellnames)
newcellnames = gsub("_L1", "-1_3_5", newcellnames)
newcellnames = gsub("_L2", "-1_4_5", newcellnames)
newcellnames = gsub("_M1", "-1_5_5", newcellnames)
newcellnames = gsub("_M2", "-1_6_5", newcellnames)
newcellnames = gsub("_a1_1", "-1_1", newcellnames)
newcellnames = gsub("_a1_2", "-1_2", newcellnames)
newcellnames = gsub("_a3_1", "-1_3", newcellnames)
newcellnames = gsub("_a3_2", "-1_4", newcellnames)
rownames(glut_dat_df) = newcellnames
Save comparisons
plt_comp_l = list()
for(n in names(subfix_l)){
plot_df = merge(glut_dat_df, subfix_4_l[[n]]@pseudotime, by = 0)
plot_df$subclasses = subfix_4_l[[n]]@meta[plot_df$Row.names,"subclasses"]
plot_df$subclasses = factor(plot_df$subclasses,
levels = c("Ependymal", "NPC", "Glutamatergic"))
cpe = round(cor(plot_df$newpt, plot_df$pseudotime_4, method = "pe"), 2)
plt_comp_l[[n]] = ggplot(plot_df, aes(x = newpt, y = pseudotime_4,
colour = cellclusters, size = subclasses))+
geom_point()+
scale_colour_manual(values = cols_cc, limits = force)+
scale_size_manual(values = c(0.5, 0.95, 1.35))+
labs(title = n, subtitle = paste0("PCC = ", cpe),
x = "Pseudotime (scVelo)", y = "Pseudotime (URD)")+
guides(colour = guide_legend(), size = guide_legend())+
theme_classic()+
theme(aspect.ratio = 1,
axis.text = element_text(colour = "black", size = 6),
axis.title = element_text(size = 7),
plot.title = element_text(size = 8),
plot.subtitle = element_text(size = 7),
legend.text = element_text(size = 6),
legend.title = element_text(size = 7),
legend.key.size = unit(0.35, "cm"))
}
Make proportion plots
for(n in names(plt_comp_l)){
pdf(paste0("results/pseudotime/SS_", n, "_comp.pdf"), height = 4, width = 4.5)
print(plt_comp_l[[n]])
dev.off()
}
Save proportions
smo_prop_list = list()
for(n in names(subfix_4_l)){
# subset data
submeta = data.frame("pseudotime" = subfix_4_l[[n]]@pseudotime$pseudotime_4,
"cellclusters" = subfix_4_l[[n]]@meta$cellclusters)
med_pt_cc = sort(tapply(submeta$pseudotime, submeta$cellclusters, median))
lt_bins = cut(submeta$pseudotime, 100) # 100 equally-sized bins
plot_df = data.frame(bins = lt_bins,
cst = as.character(submeta$cellclusters))
tab_df = table(plot_df$bins, plot_df$cst)
# remove cell types that are too rare (<5%)
tab_df = reshape2::melt(tab_df/rowSums(tab_df))
usecl = tapply(tab_df$value, tab_df$Var2, function(x) any(x>0.05))
plot_df = plot_df[plot_df$cst %in% names(usecl)[usecl],]
tab_df = table(plot_df$bins,plot_df$cst)
# normalise by cell type abundance
prop_w = prop.table(table(plot_df$cst))
tab_df = t(apply(tab_df, 1, function(x) x/prop_w[colnames(tab_df)]))
# reshape
tab_df = reshape2::melt(tab_df/rowSums(tab_df))
tab_df$Var2 = as.character(tab_df$Var2)
# prevent discontinuity by copying the previous column (likely not happening)
tab_df = tab_df[order(tab_df$Var1, decreasing = F),]
for(i in unique(tab_df$Var1)){
if(any(is.nan(tab_df$value[tab_df$Var1==i]))){
tab_df$value[tab_df$Var1==i] = prev
}
prev = tab_df$value[tab_df$Var1==i]
}
# smoothen the proportions (and force constrain to 0-1)
tab_df2 = tab_df
tab_df2$value2 = tab_df2$value
for(i in unique(tab_df2$Var2)){
fff = loess(value~as.numeric(Var1), data = tab_df2[tab_df2$Var2==i,],
span = 0.5)
pred = predict(fff)
pred[pred>1] = 1
pred[pred<0] = 0
tab_df2$value2[tab_df2$Var2==i] = pred
}
# force constrain each interval to 0-1 by doing proportion
for(i in unique(tab_df2$Var1)){
tab_df2$value2[tab_df2$Var1==i] = tab_df2$value2[tab_df2$Var1==i]/sum(tab_df2$value2[tab_df2$Var1==i])
}
tab_df2$major = unlist(lapply(strsplit(tab_df2$Var2, "_"), function(x) x[1]))
res = list()
for(nnn in unique(tab_df2$Var1)){
ss=tapply(tab_df2[tab_df2$Var1==nnn,"value2"], tab_df2[tab_df2$Var1==nnn,"major"], sum)
res[[nnn]] = which.max(ss)
}
smo_prop_list[[n]] = ggplot(tab_df2, aes(x = Var1, y = value2, group = Var2, fill = Var2))+
geom_area()+
scale_y_continuous(expand = c(0,0))+
scale_fill_manual(values = cols_cc[names(cols_cc) %in% tab_df2$Var2])+
labs(x = "Bins", y = "Proportion", fill = "Cell type")+
theme_classic()+
theme(axis.text.x = element_blank(),
axis.text.y = element_text(size = 6.5, colour = "black"),
axis.ticks.x = element_blank(),
axis.line = element_blank(),
axis.title = element_text(size = 7),
legend.text = element_text(size = 6),
legend.title = element_text(size = 7),
legend.key.size = unit(0.4, "cm"))
}